tic()ds <-open_dataset('data', format ='csv') %>%## YOUR QUERY HERE filter(UNITID ==130794) %>%group_by(AWLEVEL) %>%summarize(Degrees =sum(CTOTALT, na.rm =TRUE))toc()
0.13 sec elapsed
## WHAT KIND OF OBJECT? OBJECT SIZE? class(ds); print(object.size(ds), units ="auto")
[1] "arrow_dplyr_query"
8.2 Kb
Arrow Example in R
Get year with add_filename() and then collect() data into memory
tic()open_dataset('data', format ='csv') %>%filter(UNITID ==130794, MAJORNUM ==1, CIPCODE ==99) %>%## ADD YEAR FROM FILENAME TO DATASET mutate(file_name =add_filename(),Year =gsub('.*c([0-9]+)_a.csv', '\\1', file_name)) %>%group_by(Year) %>%summarise(Degrees =sum(CTOTALT, na.rm =TRUE)) %>%collect() ## INTO MEMORY